## Summary statistics of response variables using the pooled cross-section

p_load(Hmisc)

svysummary_acc_resp <- data.frame(matrix(ncol = 4, nrow = 0))
svysummary_lpg_resp <- data.frame(matrix(ncol = 4, nrow = 0))
svysummary_elec_resp <- data.frame(matrix(ncol = 4, nrow = 0))

#survey design with all households
for (i in acc_respvars) {
  for (j in c(0,1)) {
    for (k in c(0,1)){
      p <- pool %>% filter(scst == j, wave == k)
      svym <- svymean(as.formula(paste("~",i,sep="")),
                      design = subset(svydesign, scst == j & wave == k),
                      na.rm = T)
      m = wtd.mean(p[[i]],p$weights)
      sd = sqrt(wtd.var(p[[i]],p$weights))
      min = min(p[i], na.rm = T)
      max = max(p[i], na.rm = T)
      df <- data.frame(attribute =  i,
                       scst =  j,
                       wave =  k,
                       n = nrow(na.omit(p[i])),
                       mean = signif(m,3),
                       sd = signif(sd,3),
                       min = min,
                       max = max,
                       se = signif(sqrt(as.numeric(attr(svym, "var"))),3),
                       svym = signif(as.numeric(svym),3))
      svysummary_acc_resp <- rbind(svysummary_acc_resp, df)
      rm(p,m,sd,min,max,df,svym)
    }
  }
}

#subset survey design for those households with LPG connection
for (i in lpg_respvars) {
  for (j in c(0,1)) {
    for (k in c(0,1)){
      p <- pool %>% filter(scst == j, wave == k, uselpg == 1)
      svym <- svymean(as.formula(paste("~",i,sep="")),
                      design = subset(svydesign, scst == j & wave == k & uselpg == 1),
                      na.rm = T)
      m = wtd.mean(p[[i]],p$weights)
      sd = sqrt(wtd.var(p[[i]],p$weights))
      min = min(p[i], na.rm = T)
      max = max(p[i], na.rm = T)
      df <- data.frame(attribute =  i,
                       scst =  j,
                       wave =  k,
                       n = nrow(na.omit(p[i])),
                       mean = signif(m,3),
                       sd = signif(sd,3),
                       min = min,
                       max = max,
                       se = signif(sqrt(as.numeric(attr(svym, "var"))),3),
                       svym = signif(as.numeric(svym),3))
      svysummary_lpg_resp <- rbind(svysummary_lpg_resp, df)
      rm(p,m,sd,min,max,df,svym)
    }
  }
}

#subset survey design for those households with grid connection
for (i in elec_respvars) {
  for (j in c(0,1)) {
    for (k in c(0,1)){
      p <- pool %>% filter(scst == j, wave == k, usegrid == 1)
      svym <- svymean(as.formula(paste("~",i,sep="")),
                      design = subset(svydesign, scst == j & wave == k & usegrid == 1),
                      na.rm = T)
      m = wtd.mean(p[[i]],p$weights)
      sd = sqrt(wtd.var(p[[i]],p$weights))
      min = min(p[i], na.rm = T)
      max = max(p[i], na.rm = T)
      df <- data.frame(attribute =  i,
                       scst =  j,
                       wave =  k,
                       n = nrow(na.omit(p[i])),
                       mean = signif(m,3),
                       sd = signif(sd,3),
                       min = min,
                       max = max,
                       se = signif(sqrt(as.numeric(attr(svym, "var"))),3),
                       svym = signif(as.numeric(svym),3))
      svysummary_elec_resp <- rbind(svysummary_elec_resp, df)
      rm(p,m,sd,min,max,df,svym)
    }
  }
}

svysummary_resp <- rbind(svysummary_acc_resp, svysummary_lpg_resp, svysummary_elec_resp) #necessary for descriptive analysis visualisation

svysummary_resp_clean <- svysummary_resp %>% 
  mutate(attribute = fct_recode(as.factor(attribute),
                                "Grid Connection (==1)" = "usegrid",
                                "LPG Connection (==1)" = "uselpg",
                                "LPG Home Delivery (==1)" = "convenience",
                                "Daily Electricity Hours" = "duration_day",
                                "Evening Electricity Hours" = "duration_night",
                                "Monthly Outage Days" = "reliability", 
                                "Monthly Voltage Fluctuation Days" = "quality_fluc",
                                "Monthly Low Voltage Days" = "quality_low"),
         scst = fct_recode(as.factor(scst),
                           "SC/ST" = "1",
                           "All Others" = "0")) %>% 
  arrange(attribute)

svysummary_resp_wave1 <- svysummary_resp_clean %>% filter(wave == 0) %>% select(-c(wave,se,svym))
svysummary_resp_wave2 <- svysummary_resp_clean %>% filter(wave == 1) %>% select(-c(wave,se,svym))

colnames(svysummary_resp_wave1) <- c("Attribute", "Caste", "Obs. Wave 1", "Mean Wave 1", "SD Wave 1", "Min Wave 1", "Max Wave 1")
colnames(svysummary_resp_wave2) <- c("Attribute", "Caste", "Obs. Wave 2", "Mean Wave 2", "SD Wave 2", "Min Wave 2", "Max Wave 2")

detach(name = package:Hmisc, force = T)